/*********************************************************************
Replication code for Systemic Discrimination Among Large U.S. Employers
Patrick M. Kline, Evan K. Rose, Christopher R. Walters
April, 2022

This file produces several of the estimates of main effects in the paper,
including Tables 1-3, Figure 2, and Figures A2, A5, and A6.

It requires top-level directory be set to the replication folder using
the global below.
*********************************************************************/


*** 0) Set directories and macros and load data
global dir "/accounts/projects/pkline/randres/randres/replication"
capture restore
clear all
set seed 126312
use ${dir}/data/data.dta, clear


*** Table 1: Summary stats for balanced and unblanced sample
* Indicators for waves and regions
foreach k of numlist 1/4 {
    gen region4_`k' = region4 == `k'
}
foreach k of numlist 1/5 {
    gen wave`k' = wave == `k'
}

* Produce table
eststo clear
foreach bal of numlist 0/1 {
    foreach b of numlist 0/1 {
        eststo: quietly estpost summarize female over40 lgbtq_club academic_club political_club gender_neutral_pronouns same_gender_pronouns associates region4_1 region4_2 region4_3 region4_4 wave1 wave2 wave3 wave4 wave5 cb call_cb email_cb text_cb any_cb_0_14 any_cb_15_30 if balanced >= `bal' & black == `b'
    }
    eststo: quietly estpost ttest cb call_cb email_cb text_cb any_cb_0_14 any_cb_15_30 female over40 lgbtq_club academic_club political_club gender_neutral_pronouns same_gender_pronouns associates wave1 wave2 wave3 wave4 wave5 region4_1 region4_2 region4_3 region4_4 if balanced >= `bal', by(black) unequal 
    distinct firm_id if balanced >= `bal'
    estadd scalar nfirms r(ndistinct)
    distinct job_id if balanced >= `bal'
    estadd scalar njobs r(ndistinct)
    if `bal' == 0 {
        foreach k of numlist 1/4 {
            distinct firm_id if nwaves == `k'
            estadd scalar wave`k' r(ndistinct)        
        }
    }
}
esttab using ${dir}/tables/table1.tex, replace cells("mean(pattern(1 1 0 1 1 0) fmt(3)) b(star pattern(0 0 1 0 0 1) fmt(3))") scalars("njobs N jobs" "nfirms N firms" "wave1 \quad 1 wave" "wave2 \quad 2 waves" "wave3 \quad 3 waves" "wave4 \quad 4 waves") coeflabel(cb "\quad Any contact in 30 days" email_cb "\quad\quad Email" call_cb "\quad\quad Voicemail" text_cb "\quad\quad Text" any_cb_0_14 "\quad Any contact in 14 days" any_cb_15_30 "\quad Any contact in 15-30 days" female "\quad Female" over40 "\quad Over 40" lgbtq_club "\quad LGBTQ club member" academic_club "\quad Academic club" political_club "\quad Political club" gender_neutral_pronouns "\quad Gender-neutral pronouns" same_gender_pronouns "\quad Same-gender pronouns" associates "\quad Associate degree" region4_1 "\quad Northeast" region4_2 "\quad Midwest" region4_3 "\quad South" region4_4 "\quad West" wave1 "\quad Wave 1" wave2 "\quad Wave 2" wave3 "\quad Wave 3" wave4 "\quad Wave 4" wave5 "\quad Wave 5") nonumber mtitle("White" "Black" "Difference" "White" "Black" "Difference") mgroups("A. All firms" "B. Balanced sample", pattern(1 0 0 1 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) star(* 0.10 ** 0.05 *** 0.01) modelwidth(15)


*** Table 2: Main effects in LPM and Logit
eststo clear
foreach bal of numlist 0/1 {
    eststo: reg cb black female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates ib(1).wave i.region4 if balanced >= `bal', cluster(job_id)
    eststo: logit cb black female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates ib(1).wave i.region4  if balanced >= `bal', cluster(job_id)
}
esttab using ${dir}/tables/table2.tex, replace nogaps se drop(1.wave 1.region4) order(black female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates 1.region4 2.region4 3.region4 4.region4) mtitles("LPM" "Logit" "LPM" "Logit") coeflabel(non_lgbtq_club "Academic/political club" lgbtq_club "LGBTQ club" academic_club "Academic club" political_club "Political club" same_gender_pronouns "Same-gender pronouns" gender_neutral_pronouns "Gender-neutral pronouns" black "Black" female "Female" over40 "Over 40" associates "Associate degree" _cons "Constant" 1.region4 "Northeast" 2.region4 "Midwest" 3.region4 "South" 4.region4 "West" 2.wave "Wave 2" 3.wave "Wave 3" 4.wave "Wave 4" 5.wave "Wave 5") mgroups("A. All firms" "B. Balanced sample", pattern(1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) star(* 0.10 ** 0.05 *** 0.01)


*** Table 3: Interacted main effects
eststo clear
foreach method in "reg" "logit" {
    `method' cb black female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates i.wave i(1).black#i(1)(female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates) i(2 3 4 5).wave#ib(0).black, cluster(job_id)
    mat tmp = r(table)
    local firstcol = colnumb(tmp,"female")
    local lastcol = colnumb(tmp,"associates")
    local bcol = colnumb(tmp,"_cons")
    matrix b = tmp[1,`firstcol'..`lastcol'] , tmp[1,`bcol']
    matrix colnames b = female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates _cons
    matrix coleq b = cb
    ereturn post b
    matrix se = tmp[2,`firstcol'..`lastcol'] , tmp[2,`bcol']
    matrix colnames se = female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates _cons
    matrix coleq se = cb
    estadd matrix se
    eststo 

    `method' cb white female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates i.wave i(0).black#i(1)(female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates) i(2 3 4 5).wave#ib(0).black, cluster(job_id)
    local Nobs = e(N)
    mat tmp = r(table)
    testparm i(0).black#i(1)(female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates)
    if "`method'" == "reg" {
        local F = r(F)
        local q = r(df)
        local chi2 = `F'*`q'
        local chi2p = 1-chi2(`q',`chi2')
    }
    else {
        local chi2 = r(chi2)
        local chi2p = r(p)
    }
    local firstcol = colnumb(tmp,"female")
    local lastcol = colnumb(tmp,"associates")
    local bcol = colnumb(tmp,"_cons")
    matrix b = tmp[1,`firstcol'..`lastcol'] , tmp[1,`bcol']
    matrix colnames b = female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates _cons
    matrix coleq b = cb
    ereturn post b
    matrix se = tmp[2,`firstcol'..`lastcol'] , tmp[2,`bcol']
    matrix colnames se = female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates _cons
    matrix coleq se = cb
    estadd matrix se
    eststo 

    local firstcol = colnumb(tmp,"0.black#1.female")
    local lastcol = colnumb(tmp,"0.black#1.associates")
    local bcol = colnumb(tmp,"white")
    matrix b = tmp[1,`firstcol'..`lastcol'] , tmp[1,`bcol']
    matrix colnames b = female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates _cons
    matrix coleq b = cb
    ereturn post b
    matrix se = tmp[2,`firstcol'..`lastcol'] , tmp[2,`bcol']
    matrix colnames se = female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates _cons
    matrix coleq se = cb
    estadd matrix se
    estadd scalar N = `Nobs'
    estadd scalar chi2 = `chi2'
    estadd scalar chi2p = `chi2p'
    eststo
}

esttab using ${dir}/tables/table3.tex, replace nogaps se scalars("N N" "chi2 $\chi^2$ stat for joint significance"  "chi2p \quad p-value") order(female over40 political_club academic_club lgbtq_club same_gender_pronouns gender_neutral_pronouns associates) mtitles("White" "Black" "Difference" "White" "Black" "Difference") coeflabel(non_lgbtq_club "Academic/political club" lgbtq_club "LGBTQ club" academic_club "Academic club" political_club "Political club" same_gender_pronouns "Same-gender pronouns" gender_neutral_pronouns "Gender-neutral pronouns" female "Female" over40 "Over 40" associates "Associate degree" region4_2 "Midwest" region4_3 "South" region4_4 "West" _cons "Constant" 2.wave "Wave 2" 3.wave "Wave 3" 4.wave "Wave 4" 5.wave "Wave 5") mgroups("OLS" "Logit", pattern(1 0 0 1 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) star(* 0.10 ** 0.05 *** 0.01)



*** Figure 2: Contact rates over time
preserve
capture drop subdt
gen subdt = clock(submitted_date,"YMDhms")
format subdt %tc
gen subyear = yofd(dofc(subdt))
gen submonth = month(dofc(subdt))
gen monthdt = mdy(submonth,1,subyear)
gen black_cb = cb if black == 1
gen white_cb = cb if black == 0
gen black_cb_balanced = black_cb if balanced
gen white_cb_balanced = white_cb if balanced

collapse (mean) black_cb* white_cb* wave* (count) napps=cb, by(monthdt subyear submonth)
keep if napps > 100
gen logcb = (white_cb - black_cb)/black_cb*100
gen logcb_balanced = (white_cb_balanced - black_cb_balanced)/black_cb_balanced*100

twoway (scatter white_cb monthdt, msym(O) mc(navy%70)) (scatter black_cb monthdt, msym(D) mc(maroon%70)) (line logcb monthdt if monthdt <= 21975, msym(D) lc(dkgreen%70) yaxis(2)) (line logcb monthdt if monthdt >= 22128, msym(D) lc(dkgreen%70) yaxis(2)) (line logcb_balanced monthdt if monthdt <= 21975, msym(D) lc(dkblue%70) lp(dash)  yaxis(2)) (line logcb_balanced monthdt if monthdt >= 22128, msym(D) lc(dkblue%70) lp(dash) yaxis(2)), xsize(8) ysize(4) graphregion(color(white)) xtitle("Submission date") ytitle("Mean contact rates") ytitle("White-Black % contact gap", axis(2)) yscale(range(-10 30) axis(2)) ylabel(-10(10)30, axis(2)) legend(order(1 "White applications" 3 "White-Black % gap full sample" 2 "Black applications"  5 "White-Black % gap balanced sample") region(color(white))) xticks(`=d(1oct2019)' `=d(1jan2020)' `=d(1apr2020)' `=d(1jul2020)' `=d(1oct2020)' `=d(1jan2021)') xlabel(`=d(1oct2019)' "Oct 19" `=d(1jan2020)' "Jan 20" `=d(1apr2020)' "Apr 20" `=d(1jul2020)' "Jul 20" `=d(1oct2020)' "Oct 20" `=d(1jan2021)' "Jan 21" `=d(1apr2021)' "Apr 21") xline(`=d(17may2020)', lwidth(43)  lc(gs12%30)) xline(`=d(19mar2020)', lp(dash) lc(black%40)) xline(`=d(10june2020)', lp(dash) lc(black%40)) xline(`=d(25may2020)', lp(dash) lc(black%40)) 
graph export ${dir}/figures/figure2.pdf, replace
restore


*** Figure A2: Kaplan-Meier estimates of contact probability and smoothed hazard
replace days_to_cb = 31 if cb == 0
replace days_to_cb = 0.001 if days_to_cb == 0
stset days_to_cb, failure(cb==1)
sts graph, by(black) failure graphregion(color(white)) ci xtitle("Days since application") ytitle("Pr(any contact)") title("") yscale(range(0 0.3)) ylabel(0(0.05).3) legend(order(2 "White" 4 "Black") region(color(white)) cols(2))
graph export ${dir}/figures/figureA2a.pdf, replace

sts graph, by(black) hazard noboundary graphregion(color(white)) ci xtitle("Days since application") ytitle("Contact hazard") title("") legend(order(2 "White" 4 "Black") region(color(white)) cols(2))
graph export ${dir}/figures/figureA2b.pdf, replace


*** Figure A5: Contact rates by age category
xtile agebin=age_at_sub, nq(5)
bys agebin: egen mean_age=mean(age_at_sub)

* Run reg
qui tab agebin, ge(a_)
local test=""
foreach x of varlist a_* {
    local test "`test' (a_1=`x')"
}
qui tab wave, ge(wave_)
areg cb a_*, r cluster(job_id) absorb(wave)
test `test'
local F=round(r(F),0.1)
local p=round(r(p),0.001)
gen b=.
gen s=.
foreach x of varlist a_* {
    lincom _b[`x']+_b[_cons]
    replace b=r(estimate) if `x'==1
    replace s=r(se) if `x'==1
}

* Plot results
local F=round(`F',0.1)
preserve
bys agebin: keep if _n==1
gen u_ci=b+1.645*s
gen l_ci=b-1.645*s
graph twoway scatter b mean_age,  msymbol(oh) mcolor(navy) connect(l) lcolor(navy) ///
    || line u_ci mean_age, lcolor(black) lpattern(dash) ///
    || line l_ci mean_age, lcolor(black) lpattern(dash) ///
    scheme(s1color) xtitle("Average age") ytitle("Contact rate") ///
    legend(off) xlabel(25(5)55) ///
    text(0.253 49 "Test for equality: {it:F} = `F', {it:p} = 0`p'")
graph export ${dir}/figures/figureA5.pdf, replace
restore


*** Figure A6: Stability of firm contact gaps across waves
preserve
gen under40 = 1 - over40
foreach var of varlist black white female male over40 under40 {
    gen `var'_cb = cb if `var' == 1
}

collapse (mean) black_cb white_cb female_cb male_cb over40_cb under40_cb (count) napps=cb, by(wave firm_id)
gen race_dif = white_cb - black_cb
gen gender_dif = male_cb - female_cb
gen age_dif = under40_cb - over40_cb

foreach var of varlist race_dif gender_dif age_dif {
    capture drop tot_dif* n_dif*
    bys firm_id: egen tot_dif = total(`var')
    bys firm_id: egen n_dif = count(`var')
    bys firm_id wave: egen tot_dif_wave = total(`var')
    bys firm_id wave: egen n_dif_wave = count(`var')
    gen `var'_leaveout = (tot_dif - tot_dif_wave) / (n_dif - n_dif_wave)
}

foreach var of varlist race_dif gender_dif age_dif {
    gen `var'_dm = .
    gen `var'_leaveout_dm = .

    foreach w of numlist 1/5 {
        su `var' if wave == `w'
        *replace `var'_dm = `var' - r(mean) if wave == `w'
        replace `var'_dm = `var' if wave == `w'
        su `var'_leaveout if wave == `w'
        *replace `var'_leaveout_dm = `var'_leaveout - r(mean) if wave == `w'
        replace `var'_leaveout_dm = `var'_leaveout if wave == `w'
    }
}

* Make graphs
local k = 1
foreach var of varlist race_dif gender_dif age_dif {
	if "`var'" == "gender_dif" {
		local outlist "gender_dif race_dif"
	}
	else {
		local outlist "`var'"	
	}
	foreach out of local outlist {
	    reg `out'_dm `var'_leaveout_dm i.wave [w=napps], cluster(firm_id)
	    local slope = _b[`var'_leaveout_dm]
	    local cons = _b[_cons]
	    local beta: display %5.4f e(b)[1,1]
	    local se: display %5.4f r(table)[2,1]

	    capture drop xq*
	    xtile xq = `var'_leaveout_dm, nquantiles(25)
	    egen xqt = tag(xq) if `var'_leaveout_dm != . & `var'_dm != .
	    capture drop mean_*
	    bys xq: egen mean_y = mean(`out'_dm)
	    bys xq: egen mean_x = mean(`var'_leaveout_dm)

	    if "`var'" == "race_dif" {
	        local xtit = "Leave-wave-out white-Black contact gap"
	    }
	    else if "`var'" == "gender_dif" {
	        local xtit = "Leave-wave-out male-female contact gap"        
	    }
	    else {
	        local xtit = "Leave-wave-out under-over 40 contact gap"        
	    }
	    if "`out'" == "race_dif" {
	        local ytit = "Wave white-Black contact gap"
	    }
	    else if "`out'" == "gender_dif" {
	        local ytit = "Wave male-female contact gap"
	    }
	    else {
	        local ytit = "Wave under-over 40 contact gap"            
	    }
	    twoway (scatter mean_y mean_x if xqt, msym(Oh) color(navy%70)) (function y = `cons' + `slope'*x, lc(maroon%80) ra(mean_x)) (function y = x, lp(dash) lc(black%80) ra(mean_x)), xtitle(`xtit') ytitle(`ytit') graphregion(color(white)) note("Slope: `beta' (`se')", ring(0) pos(10)) legend(order(2 "Regression slope" 3 "45 degree line") region(color(white)))
	    graph export ${dir}/figures/figureA6_`k'.pdf, replace

	local ++k
	}
}
restore


